package per.zzzfwd.douban.crawler;

import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;

import java.net.URL;

public class Crawler {

    /**
     * 连接和读取超时时间（单位：ms）
     */
    private int timeoutMillis;

    /**
     * 延时
     */
    private long delay;

    public Crawler() {
        this(10000);
    }

    public Crawler(int timeoutMillis) {
        this.timeoutMillis = timeoutMillis;
    }

    public Crawler(long delay) {
        this(10000, delay);
    }

    public Crawler(int timeoutMillis, long delay) {
        this.timeoutMillis = timeoutMillis;
        this.delay = delay;
    }

    /**
     * 爬取页面
     *
     * @param url url
     * @return Document
     */
    public Document crawl(String url) {
        if (url == null || url.trim().isEmpty()) {
            throw new CrawlException("url为空，无法爬取数据");
        }

        if (delay > 0L) {
            try {
                Thread.sleep(delay);
            } catch (InterruptedException e) {
                e.printStackTrace();
            }
        }

        try {
            return Jsoup.parse(new URL(url), timeoutMillis);
        } catch (Exception e) {
            throw new CrawlException("爬取页面失败[" + url + "]", e);
        }
    }

    public void setTimeoutMillis(int timeoutMillis) {
        this.timeoutMillis = timeoutMillis;
    }

    public int getTimeoutMillis() {
        return timeoutMillis;
    }

    public long getDelay() {
        return delay;
    }

    public void setDelay(long delay) {
        this.delay = delay;
    }
}
